*RCRA Nationwide Hedonic Study
*Parallel trends graphs
*Created: 5/26/2020
*Created by: Dennis Guignet
*Last Revised: 12/02/2022
*Last Revised by: Dennis Guignet

********************************************************************************

*This do-file takes the completed transaction dataset of all contiguous US transactions 
*	within five kilometers of a TSD facility under RCRA, generates estimates for
*	event study graphs to examine parallel trends assumption. This includes 
*	Figure 5 in the main text, as well as Figures XXXXXXXXX. 

********************************************************************************
********************************************************************************


*set empty cells for factor variables to drop
set emptycells drop
clear all
*increase max variables allowed b/c factor variables
set maxvar 100000

*set key global variable groups

*house structure and local neighborhood vars
global house lnacres lnacres_miss stories stories_miss bathtot bathtot_miss lnsqft ///
	lnsqft_miss age agesq age_miss p_nbdev_2011_200 p_nbdev_2011_500 hwy500m

*set directory for results
cd "$resultsfolder"

********************************************************************************

*Create event study graphs corresponding to Figure 5 in main text of final paper. 

*Bring in dataset of homes w/in 5km of CA to estimate a variant of Model 4 w/
*	lead and lag coefficients. 
use "$salesfolder\All_Sales_Final_Cleaned_CA5k", replace
count		


*Set corresponding global variables 	
*code up global variables for TSD control group counts
local vars  cntTSD
foreach v of local vars {
	global `v' `v'0_250 `v'250_500 `v'500_750 `v'750_1000 ///
		`v'1000_1250 `v'1250_1500 `v'1500_1750 `v'1750_2000 ///
		`v'2000_2250 `v'2250_2500 `v'2500_2750 `v'2750_3000 ///
		`v'3000_3250 `v'3250_3500 `v'3500_3750 `v'3750_4000 ///
		`v'4000_4250 `v'4250_4500 `v'4500_4750 `v'4750_5000 
	}

*repeat above variant of model 2 in final paper, but w/ 5-year incremental window 
*	for lead and lag estimates.	
local bins 0_750 750_1500
foreach b of local bins {
	global CAopenleads`b'  dCAopen`b'_preyr20_15 dCAopen`b'_preyr15_10 ///
		dCAopen`b'_preyr10_5 dCAopen`b'_preyr5_0 
	global CAopenlags`b'  dCAopen`b'_postyr0_5 dCAopen`b'_postyr5_10 ///
		dCAopen`b'_postyr10_15 dCAopen`b'_postyr15_20 dCAopen`b'_postyr20_25 ///
		dCAopen`b'_postyr25_30 dCAopen`b'_postyr30_35 
	global CAendleads`b'  dCAend`b'_preyr20_15 dCAend`b'_preyr15_10 ///
		dCAend`b'_preyr10_5 dCAend`b'_preyr5_0
	global CAendlags`b' dCAend`b'_postyr0_5 dCAend`b'_postyr5_10 ///
		dCAend`b'_postyr10_15 dCAend`b'_postyr15_20 dCAend`b'_postyr20_25
	}	
	
reghdfe lnrprice $cntTSD $CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500, ///
	absorb(i.mycntyid#i.tranyr i.mycntyid#i.quarter i.mytractid ///
	i.mycntyid#i.tranyr#c.($house)) vce(cluster mycntyid)  compact poolsize(20)
eststo m4_trends5yr
estimates save "$raw_resultsfolder\m4_trends5yr", replace	
	
*export trend table and graph
esttab m4_trends5yr using EventStudy_CoefEstimates_Model4_5yrforTable.csv, replace label ///
	csv compress nogaps nolines star (* 0.10 ** 0.05 *** 0.01) b(4) se(4) scalars(ll) ///
	keep($CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500) 
	
esttab m4_trends5yr using EventStudy_CoefEstimates_Model4_5yrforGraph.csv, replace label ///
	plain csv compress nogaps nolines nostar b(4) ci(4) wide noparentheses ///
	keep($CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500) 
	
*test to see if pre-trends parallel - i.e., are differences equal
estimates use "$raw_resultsfolder\m4_trends5yr"	
reghdfe
eststo m4_trends5yr
*CA Opening
testnl _b[dCAopen0_750_preyr20_15]-_b[dCAopen750_1500_preyr20_15] ///
	=_b[dCAopen0_750_preyr15_10]-_b[dCAopen750_1500_preyr15_10] ///
	=_b[dCAopen0_750_preyr10_5]-_b[dCAopen750_1500_preyr10_5] ///
	=_b[dCAopen0_750_preyr5_0]-_b[dCAopen750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(chi2(3)=5.94, p=0.1146). 
test _b[dCAopen0_750_preyr20_15]-_b[dCAopen750_1500_preyr20_15] ///
	=_b[dCAopen0_750_preyr15_10]-_b[dCAopen750_1500_preyr15_10] ///
	=_b[dCAopen0_750_preyr10_5]-_b[dCAopen750_1500_preyr10_5] ///
	=_b[dCAopen0_750_preyr5_0]-_b[dCAopen750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(F(3, 376)=1.98, p=0.1166). 	

*CA Completion	
testnl _b[dCAend0_750_preyr20_15]-_b[dCAend750_1500_preyr20_15] ///
	=_b[dCAend0_750_preyr15_10]-_b[dCAend750_1500_preyr15_10] ///
	=_b[dCAend0_750_preyr10_5]-_b[dCAend750_1500_preyr10_5] ///
	=_b[dCAend0_750_preyr5_0]-_b[dCAend750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(chi2(3)=1.28, p=0.7344). 
test _b[dCAend0_750_preyr20_15]-_b[dCAend750_1500_preyr20_15] ///
	=_b[dCAend0_750_preyr15_10]-_b[dCAend750_1500_preyr15_10] ///
	=_b[dCAend0_750_preyr10_5]-_b[dCAend750_1500_preyr10_5] ///
	=_b[dCAend0_750_preyr5_0]-_b[dCAend750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(chi2(3)=1.28, p=0.7344). 
	
*The estimates derived here from the initial regression estimates are what is used
*	to later populate Panel (b) of Figure 5 in main text. 	
eststo m4_trends5yrCAend_diffs_end: nlcom ///
	(diffpreyr20_15: _b[dCAend0_750_preyr20_15]-_b[dCAend750_1500_preyr20_15]) ///
	(diffpreyr15_10: _b[dCAend0_750_preyr15_10]-_b[dCAend750_1500_preyr15_10]) ///
	(diffpreyr10_5: _b[dCAend0_750_preyr10_5]-_b[dCAend750_1500_preyr10_5]) ///
	(diffpreyr5_0: _b[dCAend0_750_preyr5_0]-_b[dCAend750_1500_preyr5_0]) ///
	(diffpostyr0_5: _b[dCAend0_750_postyr0_5]-_b[dCAend750_1500_postyr0_5]) /// 
	(diffpostyr5_10: _b[dCAend0_750_postyr5_10]-_b[dCAend750_1500_postyr5_10]) ///
	(diffpostyr10_15: _b[dCAend0_750_postyr10_15]-_b[dCAend750_1500_postyr10_15]) ///
	(diffpostyr15_20: _b[dCAend0_750_postyr15_20]-_b[dCAend750_1500_postyr15_20]) ///
	(diffpostyr20_25: _b[dCAend0_750_postyr20_25]-_b[dCAend750_1500_postyr20_25]),  post	
	
test _b[diffpreyr20_15]=_b[diffpreyr15_10]=_b[diffpreyr10_5]=_b[diffpreyr5_0]	
	
*export trend table and graph
esttab m4_trends5yrCAend_diffs_end using "$resultsfolder\EventStudy_CoefEstDiffs_CAEnd_Model4_5yrforTable.csv", replace label ///
	csv compress nogaps nolines star (* 0.10 ** 0.05 *** 0.01) b(4) se(4) scalars(ll) 
esttab m4_trends5yrCAend_diffs_end using "$resultsfolder\EventStudy_CoefEstDiffs_CAEnd_Model4_5yrforGraph.csv", replace label ///
	plain csv compress nogaps nolines nostar b(4) ci(4) wide noparentheses 
	
*Do same but with CA opened. 
estimates use "$raw_resultsfolder\m4_trends5yr"	
reghdfe
eststo m4_trends5yr
*The estimates derived here from the initial regression estimates are what is used
*	to later populate Panel (a) of Figure 5 in main text. 
eststo m4_trends5yrCAopen_diffs: nlcom ///
	(diffpreyr20_15: _b[dCAopen0_750_preyr20_15]-_b[dCAopen750_1500_preyr20_15]) ///
	(diffpreyr15_10: _b[dCAopen0_750_preyr15_10]-_b[dCAopen750_1500_preyr15_10]) ///
	(diffpreyr10_5: _b[dCAopen0_750_preyr10_5]-_b[dCAopen750_1500_preyr10_5]) ///
	(diffpreyr5_0: _b[dCAopen0_750_preyr5_0]-_b[dCAopen750_1500_preyr5_0]) ///
	(diffpostyr0_5: _b[dCAopen0_750_postyr0_5]-_b[dCAopen750_1500_postyr0_5]) /// 
	(diffpostyr5_10: _b[dCAopen0_750_postyr5_10]-_b[dCAopen750_1500_postyr5_10]) ///
	(diffpostyr10_15: _b[dCAopen0_750_postyr10_15]-_b[dCAopen750_1500_postyr10_15]) ///
	(diffpostyr15_20: _b[dCAopen0_750_postyr15_20]-_b[dCAopen750_1500_postyr15_20]) ///
	(diffpostyr20_25: _b[dCAopen0_750_postyr20_25]-_b[dCAopen750_1500_postyr20_25]) ///
	(diffpostyr25_30: _b[dCAopen0_750_postyr25_30]-_b[dCAopen750_1500_postyr25_30]) ///
	(diffpostyr30_35: _b[dCAopen0_750_postyr30_35]-_b[dCAopen750_1500_postyr30_35]),  post
	
test _b[diffpreyr20_15]=_b[diffpreyr15_10]=_b[diffpreyr10_5]=_b[diffpreyr5_0]

*export trend table and graph
esttab m4_trends5yrCAopen_diffs using "$resultsfolder\EventStudy_CoefEstDiffs_CAOpen_Model4_5yrforTable.csv", replace label ///
	csv compress nogaps nolines star (* 0.10 ** 0.05 *** 0.01) b(4) se(4) scalars(ll) 
esttab m4_trends5yrCAopen_diffs using "$resultsfolder\EventStudy_CoefEstDiffs_CAOpen_Model4_5yrforGraph.csv", replace label ///
	plain csv compress nogaps nolines nostar b(4) ci(4) wide noparentheses 	
	

*********************************************************************************	
	
*Variant of model 6 w/ lead and lag terms (only includes CEM-matched sales 
*	within 1,500m of CA). Generates estimates for Figure A7 in Appendix F.3.		
	
use "$salesfolder\All_Sales_Final_Cleaned_CA1500m_CEM_MatchOnly", replace	

*reset global vars to only account for TSDs in closer distance bins

*TSD counts	
local vars  cntTSD
foreach v of local vars {
	global `v' `v'0_250 `v'250_500 `v'500_750 `v'750_1000 ///
		`v'1000_1250 `v'1250_1500 /* `v'1500_1750 `v'1750_2000 ///
		`v'2000_2250 `v'2250_2500 `v'2500_2750 `v'2750_3000 ///
		`v'3000_3250 `v'3250_3500 `v'3500_3750 `v'3750_4000 ///
		`v'4000_4250 `v'4250_4500 `v'4500_4750 `v'4750_5000 */
	}	


*repeat variant of model 2 (CEM-sample) in final paper, but w/ 5-year 
*	incremental window.	
local bins 0_750 750_1500
foreach b of local bins {
	global CAopenleads`b'  dCAopen`b'_preyr20_15 dCAopen`b'_preyr15_10 ///
		dCAopen`b'_preyr10_5 dCAopen`b'_preyr5_0 
	global CAopenlags`b'  dCAopen`b'_postyr0_5 dCAopen`b'_postyr5_10 ///
		dCAopen`b'_postyr10_15 dCAopen`b'_postyr15_20 dCAopen`b'_postyr20_25 ///
		dCAopen`b'_postyr25_30 dCAopen`b'_postyr30_35 
	global CAendleads`b'  dCAend`b'_preyr20_15 dCAend`b'_preyr15_10 ///
		dCAend`b'_preyr10_5 dCAend`b'_preyr5_0	
	global CAendlags`b' dCAend`b'_postyr0_5 dCAend`b'_postyr5_10 ///
		dCAend`b'_postyr10_15 dCAend`b'_postyr15_20 dCAend`b'_postyr20_25
	}	
	
reghdfe lnrprice $cntTSD $CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500 ///
	[pweight=cem_weights], absorb(i.mycntyid#i.tranyr i.mycntyid#i.quarter i.mytractid ///
	i.mycntyid#i.tranyr#c.($house)) vce(cluster mycntyid)  compact poolsize(20)	
eststo m6_trends5yr
estimates save "$raw_resultsfolder\m6_trends5yr", replace

*export trends results for table and graph
esttab m6_trends5yr using EventStudy_CoefEstimates_Model6_5yrforTable.csv, replace label ///
	csv compress nogaps nolines star (* 0.10 ** 0.05 *** 0.01) b(4) se(4) scalars(ll) ///
	keep($CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500) 
	
esttab m6_trends5yr using EventStudy_CoefEstimates_Model6_5yrforGraph.csv, replace label ///
	plain csv compress nogaps nolines nostar b(4) ci(4) wide noparentheses ///
	keep($CAopenleads0_750 $CAopenlags0_750 $CAopenleads750_1500 $CAopenlags750_1500 ///
	$CAendleads0_750 $CAendlags0_750 $CAendleads750_1500 $CAendlags750_1500) 

*test to see if pre-trends parallel - i.e., are differences equal
estimates use "$raw_resultsfolder\m6_trends5yr"
reghdfe	
eststo m6_trends5yr	
*CA Opening
testnl _b[dCAopen0_750_preyr20_15]-_b[dCAopen750_1500_preyr20_15] ///
	=_b[dCAopen0_750_preyr15_10]-_b[dCAopen750_1500_preyr15_10] ///
	=_b[dCAopen0_750_preyr10_5]-_b[dCAopen750_1500_preyr10_5] ///
	=_b[dCAopen0_750_preyr5_0]-_b[dCAopen750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(chi2(3)=5.24, p=0.1551. 
test _b[dCAopen0_750_preyr20_15]-_b[dCAopen750_1500_preyr20_15] ///
	=_b[dCAopen0_750_preyr15_10]-_b[dCAopen750_1500_preyr15_10] ///
	=_b[dCAopen0_750_preyr10_5]-_b[dCAopen750_1500_preyr10_5] ///
	=_b[dCAopen0_750_preyr5_0]-_b[dCAopen750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(F(3, 161)=1.75, p=0.1597). 
	
*CA Completion	
testnl _b[dCAend0_750_preyr20_15]-_b[dCAend750_1500_preyr20_15] ///
	=_b[dCAend0_750_preyr15_10]-_b[dCAend750_1500_preyr15_10] ///
	=_b[dCAend0_750_preyr10_5]-_b[dCAend750_1500_preyr10_5] ///
	=_b[dCAend0_750_preyr5_0]-_b[dCAend750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(chi2(3)=1.08, p=0.7808). 
test _b[dCAend0_750_preyr20_15]-_b[dCAend750_1500_preyr20_15] ///
	=_b[dCAend0_750_preyr15_10]-_b[dCAend750_1500_preyr15_10] ///
	=_b[dCAend0_750_preyr10_5]-_b[dCAend750_1500_preyr10_5] ///
	=_b[dCAend0_750_preyr5_0]-_b[dCAend750_1500_preyr5_0]
	*Note: Fail to reject null hypothesis that differences are statistically equal
	*	(F(3, 161)=0.36, p=0.7809). 
	
	
*END	



